the ranks will be a named num, with
entrezgene_id as name and stat (Wald Test) as
metric
getRanks <- function(res, annot) {
# only taking genes which have entrezgene_ids assigned to them
genes_with_entrez <- select(annot, GeneID, entrezgene_id) %>%
filter(!is.na(entrezgene_id))
ranks <- as.data.frame(res) %>%
tibble::rownames_to_column("GeneID") %>%
merge(genes_with_entrez, by = "GeneID") %>%
arrange(desc(stat)) %>%
select(entrezgene_id, stat) %>%
tibble::deframe() # creating a named num from two columns
return(ranks)
}
ranks.gastroc <- getRanks(res.gastroc, annot)
ranks.soleus <- getRanks(res.soleus, annot)
Pathways are provided by http://www.gsea-msigdb.org/gsea/msigdb/mouse/collections.jsp
For now the Canonical pathways are used. These gene sets represent biological a biological process. They are composed from the following databases taking a subset of CP:
| database | gene sets |
|---|---|
| BioCarta | 252 |
| Reactome | 1249 |
| WikiPathways | 186 |
Here, using maxSize = 200.
fgseaRes <- fgsea(
pathways = CGP,
stats = ranks,
minSize = 15,
maxSize = 200 # (see above the chunk if other value was used)
)
ordering pathways by padj values and using ES to
# ' obtain top pathways ordered by padj and use `ES` for up or down regulation
get_top_pathways <- function(fgseaRes, up = TRUE, pCutoff=0.01, n=10) {
.updown <- ifelse(up, `>`, `<`)
top.pathways <- fgseaRes %>%
filter(.updown(ES,0), padj < pCutoff) %>%
arrange(padj) %>%
slice_head(n=n)
return(top.pathways)
}
plot for top up and down regulated pathways
# ' plots top n enrichment plots for the given fgsea result
plot_top_enrichment <- function(fgseaRes, pathways, ranks, n = 9, up = TRUE) {
# extracting the top n pathways
top.pathways <- get_top_pathways(fgseaRes, up=up, pCutoff=params$pCutoff, n=n)
plot.list <- list()
# lims <- list("x" = c(0,17000), "y" = c(-0.8,0.0))
for (i in 1:nrow(top.pathways)) {
# filling plot.list with enrichmentPlots
# TODO: how can I use facet_wrap for this?
pathway <- top.pathways[i]$pathway
plt <- plotEnrichment(pathways[[pathway]], ranks) +
# TODO: adjust yaxis to the same scale
# TODO: keep axis.text.x only on the lower row
# TODO: keep axis.text.y only on the right column
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
) # +
# coord_cartesian(xlim = lims$x, ylim = lims$y)
plot.list[[i]] <- plt
}
arrange_plts(plot.list)
}
# ' helper function to arragen the plot from the enrichment
arrange_plts <- function(plt.list) {
nplts <- length(plt.list)
plt <- plt.list[1]
xlab <- plt$labels$x
ylab <- plt$labels$y
# set axis to the same scale
lims <- list("x" = c(0, 17000), "y" = c(-0.8, 0.0))
# remove axis
# arrange the plots
fig_labels <- LETTERS[1:nplts]
figure <- ggpubr::ggarrange(plotlist = plt.list,
labels = fig_labels) %>%
annotate_figure(left = text_grob(ylab, rot = 90),
bottom = text_grob(xlab))
return(figure)
}
# plot_labels <-
# data.frame("label" = LETTERS[1:10], "pathway" = top.pathways)
# knitr::kable(caption = "plot labels", plot_labels)
plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=T)
# TODO: add plot labels to return argument of plot_top_enrichment (use list probably)
plot_labels <-
data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
| label | pathway |
|---|---|
| A | WP_TYROBP_CAUSAL_NETWORK_IN_MICROGLIA |
| B | WP_MICROGLIA_PATHOGEN_PHAGOCYTOSIS_PATHWAY |
| C | WP_APOPTOSIS |
| D | WP_FIBRIN_COMPLEMENT_RECEPTOR_3_SIGNALING_PATHWAY |
| E | REACTOME_IMMUNOREGULATORY_INTERACTIONS_BETWEEN_A_LYMPHOID_AND_A_NON_LYMPHOID_CELL |
| F | BIOCARTA_TNFR2_PATHWAY |
| G | WP_CHEMOKINE_SIGNALING_PATHWAY |
| H | REACTOME_ANTIMICROBIAL_PEPTIDES |
| I | REACTOME_FCGAMMA_RECEPTOR_FCGR_DEPENDENT_PHAGOCYTOSIS |
plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=F)
plot_labels <-
data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
| label | pathway |
|---|---|
| A | REACTOME_THE_CITRIC_ACID_TCA_CYCLE_AND_RESPIRATORY_ELECTRON_TRANSPORT |
| B | REACTOME_RESPIRATORY_ELECTRON_TRANSPORT_ATP_SYNTHESIS_BY_CHEMIOSMOTIC_COUPLING_AND_HEAT_PRODUCTION_BY_UNCOUPLING_PROTEINS |
| C | REACTOME_RESPIRATORY_ELECTRON_TRANSPORT |
| D | WP_ELECTRON_TRANSPORT_CHAIN |
| E | REACTOME_COMPLEX_I_BIOGENESIS |
| F | REACTOME_MITOCHONDRIAL_TRANSLATION |
| G | REACTOME_KEAP1_NFE2L2_PATHWAY |
| H | REACTOME_CELLULAR_RESPONSE_TO_HYPOXIA |
| I | REACTOME_CELLULAR_RESPONSE_TO_CHEMICAL_STRESS |
plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=T)
plot_labels <-
data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
| label | pathway |
|---|---|
| A | REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE |
| B | REACTOME_FORMATION_OF_A_POOL_OF_FREE_40S_SUBUNITS |
| C | WP_CYTOPLASMIC_RIBOSOMAL_PROTEINS |
| D | REACTOME_NONSENSE_MEDIATED_DECAY_NMD_INDEPENDENT_OF_THE_EXON_JUNCTION_COMPLEX_EJC |
| E | WP_TYROBP_CAUSAL_NETWORK_IN_MICROGLIA |
| F | REACTOME_EUKARYOTIC_TRANSLATION_INITIATION |
| G | REACTOME_NONSENSE_MEDIATED_DECAY_NMD |
| H | REACTOME_MAJOR_PATHWAY_OF_RRNA_PROCESSING_IN_THE_NUCLEOLUS_AND_CYTOSOL |
| I | REACTOME_INTERLEUKIN_7_SIGNALING |
plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=F)
plot_labels <-
data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
| label | pathway |
|---|---|
| A | REACTOME_KEAP1_NFE2L2_PATHWAY |
| B | REACTOME_CELLULAR_RESPONSE_TO_CHEMICAL_STRESS |
| C | REACTOME_ASYMMETRIC_LOCALIZATION_OF_PCP_PROTEINS |
| D | REACTOME_GLI3_IS_PROCESSED_TO_GLI3R_BY_THE_PROTEASOME |
| E | REACTOME_ACTIVATION_OF_APC_C_AND_APC_C_CDC20_MEDIATED_DEGRADATION_OF_MITOTIC_PROTEINS |
| F | REACTOME_UBIQUITIN_MEDIATED_DEGRADATION_OF_PHOSPHORYLATED_CDC25A |
| G | REACTOME_UCH_PROTEINASES |
| H | REACTOME_REGULATION_OF_MRNA_STABILITY_BY_PROTEINS_THAT_BIND_AU_RICH_ELEMENTS |
| I | REACTOME_RUNX1_REGULATES_TRANSCRIPTION_OF_GENES_INVOLVED_IN_DIFFERENTIATION_OF_HSCS |
top significant pathways:
# creating up and down regulated pathway vectors separately to maintain order
topUp <- get_top_pathways(fgseaRes.gastroc, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.gastroc, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
arrange(-NES) %>%
pull(pathway)
plotGseaTable(
pathways = CGP[topPathways],
stats = ranks.gastroc,
fgseaRes = fgseaRes.gastroc,
gseaParam = 0.5,
render = TRUE
) %>%
ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline
top significant pathways:
# creating up and down regulated pathway vectors separately to maintain order
topUp <- get_top_pathways(fgseaRes.soleus, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.soleus, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
arrange(-NES) %>%
pull(pathway)
plotGseaTable(
pathways = CGP[topPathways],
stats = ranks.soleus,
fgseaRes = fgseaRes.soleus,
gseaParam = 0.5,
render = TRUE
) %>%
ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline
using NES from the fgsea result filtering on the set
pCutoff=0.01 yields the following plot:
prepare_result <- function(fgseaRes, colname="NES", pCutoff) {
fgseaRes.prep <- fgseaRes %>%
data.frame() %>%
filter(padj < pCutoff) %>%
dplyr::rename(!!colname := NES) %>%
dplyr::select(!!colname, pathway)
return(fgseaRes.prep)
}
prep.gastroc <- prepare_result(fgseaRes.gastroc, colname="gastroc", params$pCutoff)
prep.soleus <- prepare_result(fgseaRes.soleus, colname="soleus", params$pCutoff)
# combining wald-Test data from both tissues and ordering in quadrants
res.combined <- merge(prep.gastroc,
prep.soleus,
by = "pathway") %>%
mutate(diff.exp = case_when(
gastroc < 0 & soleus < 0 ~ "both down",
gastroc > 0 & soleus > 0 ~ "both up",
gastroc < 0 & soleus > 0 ~ "ga down, sol up",
gastroc > 0 & soleus < 0 ~ "ga up, sol down",
TRUE ~ "different"
))
# only annotating top_n pathways by:
# removing all pathway_names except the top_n_pathways (sum of absolute NES numbers)
# top_n_pathways <- 10
# top_labels <- res.combined %>%
# group_by(diff.exp) %>%
# arrange(desc(abs(gastroc) + abs(soleus))) %>%
# filter(row_number() %in% c(1:top_n_pathways)) %>%
# ungroup() %>%
# .$pathway
# res.combined <- res.combined %>%
# mutate(pathway = ifelse(pathway %in% top_labels, pathway, ""))
# final plot
p <- ggplot(res.combined, aes(x = gastroc, y = soleus, text=pathway)) +
geom_vline(xintercept = 0) +
geom_hline(yintercept = 0) +
geom_point(aes(color = diff.exp)) +
# scale_color_manual(values = c("red", "chartreuse1", "bisque", "royalblue")) +
labs(x = "gastroc", y = "soleus") +
# ggrepel::geom_label_repel(max.overlaps = 20) +
ggtitle(label = "NES")
plotly::ggplotly(p, tooltip = "all")
ggplot(res.combined, aes(x = diff.exp)) +
geom_bar(aes(fill = diff.exp))